Intro to Urban Analytics

Bon Woo Koo & Subhrajit Guhathakurta

2022-08-20

Environment Setting

# Import required packages
library(tidyverse)
library(tmap)
library(ggplot2)
library(units)
library(sf)
library(leaflet)
library(tidycensus)
library(leafsync)
library(dbscan)
library(sfnetworks)
library(tigris)
library(tidygraph)
library(plotly)

wd <- file.path(Sys.getenv('setwd'),"work/working/School/UA_2022/external/Lab/module_2")
setwd(eval(wd))
tmap_mode('view')

What is Open Street Map

https://journal.r-project.org/archive/2013/RJ-2013-005/RJ-2013-005.pdf

https://r-spatial.org/r/2019/09/26/spatial-networks.html

https://cran.r-project.org/web/packages/dodgr/vignettes/dodgr.html

library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
# Get bounding box coordinates for Atlanta
bb <- getbb('Atlanta, GA')

# To see where bb covers
bb_sf <- bb %>% t %>% data.frame() %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326) %>% 
  st_bbox() %>% 
  st_as_sfc()

tm_shape(bb_sf) + tm_borders()

OSM wiki provides a detailed description on various ‘key-value’ pairs. To download all possible key:value pairs, you can insert available_tags("highway") instead of manually specifying a list of values. One caveat is that, using all available tags will generate a large data, significantly slowing down the processing speed.

# Get OSM road data
osm_road <- opq(bbox = bb) %>%
  add_osm_feature(key = 'highway', 
                  value = c("motorway", "trunk", "primary", "secondary", "tertiary", "unclassified",
                            "residential")) %>%
  osmdata_sf() %>% 
  osm_poly2line()

names(osm_road)
## [1] "bbox"              "overpass_call"     "meta"             
## [4] "osm_points"        "osm_lines"         "osm_polygons"     
## [7] "osm_multilines"    "osm_multipolygons"
tm_shape(osm_road$osm_lines) + tm_lines(col = "highway")
table(osm_road$osm_lines$highway)
## 
##     motorway      primary  residential    secondary     tertiary        trunk 
##          797          739        15144         2485         1776          476 
## unclassified 
##          402

Defining a custom bounding box

Calculating network centrality and betweenness of the network we just downloaded requires a bit too long time for this class. For class exercise, let’s limit the bounding box to a smaller area and download only road networks (see this to check what I mean by road networks). This technique of generating your own bounding box can also be useful if you have a specific area of interest that doesn’t overlap well with commonly used boundaries.

You need to define two points for bounding box, one point at the lower left corner and the other at the upper right corner. You can go to Google Maps, right-click on a point on map, and copy the XY coordinate. Let’s store them in p1 and p2.

# p1 is lower left corner, p2 is the upper right corner
p1 <- c(33.746217847959734, -84.40851957882589)
p2 <- c(33.785889694219634, -84.36354430149285)

You will then need to format the two coordinates into the same format as bb, the bounding box object we created above. Then, let’s convert it into sf object.

# Custom BB
my_bb <- matrix(c(p1[2], p1[1],
                  p2[2], p2[1]), ncol = 2)

colnames(my_bb) <- c("min", "max")
rownames(my_bb) <- c("x", "y")

# Custom BB to sf
my_bb %>% t %>% data.frame() %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326) %>% 
  st_bbox() %>% 
  st_as_sfc() %>% 
  tm_shape(.) + tm_borders()
# Get OSM road data
osm_sml <- opq(bbox = my_bb) %>%
  add_osm_feature(key = 'highway', 
                  value = c("motorway", "trunk", "primary", "secondary", "tertiary", "unclassified",
                            "residential")) %>%
  osmdata_sf() %>% 
  osm_poly2line()

osm_sml_pal <- colorFactor(palette = "Spectral", domain = osm_sml$osm_lines$highway)

leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  addPolylines(data = osm_sml$osm_lines, weight = 3, opacity = 0.9, color = ~osm_sml_pal(highway)) %>% 
  addLegend(position = "bottomright", pal = osm_sml_pal, 
            values = osm_sml$osm_lines$highway, title = "Highway Categories")

Converting OSM data into a graph

net <- sfnetworks::as_sfnetwork(osm_sml$osm_lines, directed = FALSE)
print(net)
## # A sfnetwork with 1670 nodes and 1611 edges
## #
## # CRS:  EPSG:4326 
## #
## # An undirected multigraph with 247 components with spatially explicit edges
## #
## # Node Data:     1,670 × 1 (active)
## # Geometry type: POINT
## # Dimension:     XY
## # Bounding box:  xmin: -84.41749 ymin: 33.73862 xmax: -84.3525 ymax: 33.7971
##               geometry
##            <POINT [°]>
## 1 (-84.39738 33.76535)
## 2 (-84.39613 33.76534)
## 3 (-84.40845 33.77004)
## 4 (-84.40837 33.77255)
## 5   (-84.3786 33.7669)
## 6 (-84.37793 33.76562)
## # … with 1,664 more rows
## #
## # Edge Data:     1,611 × 122
## # Geometry type: LINESTRING
## # Dimension:     XY
## # Bounding box:  xmin: -84.41749 ymin: 33.73862 xmax: -84.3525 ymax: 33.7971
##    from    to osm_id name  abando… access access… alt_na… attrib… bicycle bridge
##   <int> <int> <chr>  <chr> <chr>   <chr>  <chr>   <chr>   <chr>   <chr>   <chr> 
## 1     1     2 92346… Mill… <NA>    <NA>   <NA>    <NA>    <NA>    yes     <NA>  
## 2     3     4 92346… Trav… <NA>    <NA>   <NA>    <NA>    <NA>    <NA>    <NA>  
## 3     5     6 92354… Rena… <NA>    priva… <NA>    <NA>    <NA>    <NA>    <NA>  
## # … with 1,608 more rows, and 111 more variables: bridge.name <chr>, bus <chr>,
## #   change <chr>, change.backward <chr>, change.forward <chr>,
## #   change.lanes <chr>, change.lanes.backward <chr>,
## #   change.lanes.forward <chr>, covered <chr>, cycleway <chr>,
## #   cycleway.both <chr>, cycleway.est_width <chr>, cycleway.left <chr>,
## #   cycleway.right <chr>, destination <chr>, destination.lanes <chr>,
## #   destination.lanes.forward <chr>, destination.ref.lanes <chr>,
## #   embedded_rails <chr>, expressway <chr>, fee <chr>, fixme <chr>, foot <chr>,
## #   gatech.PRKG_NUM <chr>, goods <chr>, HFCS <chr>, hgv <chr>,
## #   hgv.minaxles <chr>, highway <chr>, highway_1 <chr>, horse <chr>, hov <chr>,
## #   hov.lanes <chr>, incline <chr>, junction <chr>, lanes <chr>,
## #   lanes.backward <chr>, lanes.forward <chr>, layer <chr>, level <chr>,
## #   lit <chr>, loc_name <chr>, maxheight <chr>, maxspeed <chr>,
## #   maxspeed.type <chr>, maxweight <chr>, minspeed <chr>, motor_vehicle <chr>,
## #   motor_vehicle.conditional <chr>, motor_vehicle.lanes <chr>,
## #   motorcycle.lanes <chr>, name.etymology.wikidata <chr>, name_1 <chr>,
## #   noname <chr>, note <chr>, note.lanes <chr>, note.name <chr>,
## #   official_name <chr>, old_name <chr>, oneway <chr>, oneway_1 <chr>,
## #   parking <chr>, parking.lane.both <chr>, parking.lane.left <chr>,
## #   parking.lane.left.parallel <chr>, parking.lane.right <chr>,
## #   parking.lanes.both <chr>, placement <chr>, placement.end <chr>,
## #   placement.start <chr>, postal_code <chr>, psv <chr>, rcn_ref <chr>,
## #   ref <chr>, reg_name <chr>, short_name <chr>, sidewalk <chr>,
## #   smoothness <chr>, source <chr>, source.maxspeed <chr>, surface <chr>,
## #   taxi <chr>, tiger.cfcc <chr>, tiger.county <chr>, tiger.name_base <chr>,
## #   tiger.name_base_1 <chr>, tiger.name_base_2 <chr>, tiger.name_base_3 <chr>,
## #   tiger.name_base_4 <chr>, tiger.name_base_5 <chr>,
## #   tiger.name_direction_prefix <chr>, tiger.name_direction_suffix <chr>,
## #   tiger.name_direction_suffix_1 <chr>, tiger.name_type <chr>,
## #   tiger.name_type_1 <chr>, tiger.reviewed <chr>, tiger.separated <chr>,
## #   tiger.source <chr>, tiger.tlid <chr>, tiger.upload_uuid <chr>, …
leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  addPolylines(data = net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = net %>% st_as_sf('nodes'), fillColor = 'yellow', 
               stroke = F, radius = 20, fillOpacity = 0.7) 

Cleaning network

Simplifying network

This content is heavily based on this sfnetwork tutorial

Multiple edges can connect the same pair of nodes, called multiple edges. There can also be loops that starts and ends at the same node (e.g., think of a cul-de-sac). The former case can be detected using edge_is_multiple() and the latter using edge_is_loop().

When removing a set of multiple edges using functions shown below, they always keep the first edge and discard others. By arranging the order of edges for each set of multiple edges, you can specify which edge you want to preserve.

This way of simplification means that, when the multiple edges within a set have different attributes, all attribute information except for the preserved one would be lost. In such cases, you can merge those edges. Then, the output would have the geometry of the first edge in the set, but the attributes would be some summary of all the edges in the set. to_spatial_simple() function does this work.

# Let's simplify our network
simple_net <- net %>%
  activate("edges") %>%
  filter(!edge_is_multiple()) %>%
  filter(!edge_is_loop())

message(str_c("Before simplification, there were ", net %>% st_as_sf("edges") %>% nrow(), " edges. \n",
            "After simplification, there are ", simple_net %>% st_as_sf("edges") %>% nrow(), " edges."))
## Before simplification, there were 1611 edges. 
## After simplification, there are 1601 edges.

Subdivide edges

When as_sfnetwork() function converts an sf linestrings, the nodes are defined as the endpoints of each linestring. If you zoom into Midtown in the map above, you will see that there are many intersections that do not have nodes, which are errors. We can use ``

# Subdivision
subdiv_net <- convert(simple_net, sfnetworks::to_spatial_subdivision)
## Warning: to_spatial_subdivision assumes attributes are constant over geometries
# Add a custom index
nrow_nodes <- subdiv_net %>% st_as_sf("nodes") %>% nrow()
subdiv_net <- subdiv_net %>% activate("nodes") %>% mutate(custom_id = seq(1,nrow_nodes))

subdiv_map <- leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7768, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = subdiv_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = subdiv_net %>% st_as_sf('nodes'), fillColor = 'yellow', stroke = F, radius = 20, fillOpacity = 0.7) 

subdiv_map

Delete pseudo nodes

smoothed_net <- convert(subdiv_net, sfnetworks::to_spatial_smooth)

# Extract removed points
removed <- setdiff(subdiv_net %>% st_as_sf('nodes') %>% pull(custom_id), 
                   smoothed_net %>% st_as_sf('nodes') %>% pull(custom_id))

smooth_map <- leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7768, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = smoothed_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = smoothed_net %>% st_as_sf('nodes'), fillColor = 'yellow', stroke = F, radius = 20, fillOpacity = 0.7) %>% 
    addCircles(data = subdiv_net %>% st_as_sf("nodes") %>% filter(custom_id %in% removed), 
               fillColor = "red", stroke = F, radius = 15, fillOpacity = 0.8)

leafsync::sync(subdiv_map, smooth_map)

Simplify intersections (optional)

# Extract node coordinates
node_coords = smoothed_net %>%
  st_transform(crs = 26967) %>% 
  activate("nodes") %>%
  st_coordinates()

# Cluster nodes
clusters = dbscan(node_coords, eps = 20, minPts = 1)$cluster

# Add the cluster information back to the original network
clustered = smoothed_net %>%
  activate("nodes") %>%
  mutate(cls = clusters) %>% 
  # 
  mutate(cmp = group_components())

contracted = convert(
  clustered,
  sfnetworks::to_spatial_contracted,
  cls, cmp,
  simplify = TRUE
)


before <- leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7768, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = net %>% st_as_sf('nodes'), fillColor = 'yellow', stroke = F, radius = 20, fillOpacity = 0.7) %>% 
  addControl(html = htmltools::HTML("Raw download from OSM"))

after <- leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7768, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = contracted %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = contracted %>% st_as_sf('nodes'), fillColor = 'yellow', stroke = F, radius = 20, fillOpacity = 0.7) %>% 
  addControl(html = htmltools::HTML("Fully processed"))

leafsync::sync(before, after)

Calculate centrality

network_char <- contracted %>% 
  activate("edges") %>%
  mutate(weight = edge_length()) %>% 
  mutate(edge_bc = centrality_edge_betweenness(weights = weight, directed = F)) %>%
  activate("nodes") %>% 
  mutate(node_bc = centrality_betweenness(weights = weight, directed = F))
## Warning in betweenness(graph = graph, v = V(graph), directed = directed, :
## 'nobigint' is deprecated since igraph 1.3 and will be removed in igraph 1.4
# Edge betweenness
bet_pal_edge <- colorNumeric(palette = "Reds", domain = network_char %>% activate("edges") %>% pull(edge_bc), n = 6)

# Node betweenness
bet_pal_node <- colorNumeric(palette = "Reds", domain = network_char %>% activate("nodes") %>% pull(node_bc), n = 6)

# Map
leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>%
  setView( -84.3854, 33.7768, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = network_char %>% st_as_sf("edges"), 
               color = ~bet_pal_edge(network_char %>% st_as_sf('edges') %>% pull(edge_bc)), weight = 3, opacity = 0.7) %>% 
  addCircles(data = network_char %>% st_as_sf("nodes"), 
               fillColor = ~bet_pal_node(network_char %>% st_as_sf('nodes') %>% pull(node_bc)), stroke = F, fillOpacity = 0.7, 
             radius = network_char %>% st_as_sf("nodes") %>% with(.$node_bc/1000)) # 1000 is selected to make the max value roughly equal to 100
# Try 2 more other centrality measures.
# Find good ways to calculate the radius argument in addCircles() function for visually pleasing maps for the 2 centrality measures.

Shortest path

# Start point
start_p <- st_point(c(-84.40364459476174,33.776160322717544)) %>% st_sfc(crs = 4326) # CRC at Georgia Tech
# End point
target_p <- st_point(c(-84.37639335217811, 33.75718076235044)) %>% st_sfc(crs = 4326) # MLK National Historical Park
# Get the shortest path
paths = st_network_paths(net, from = start_p, to = target_p)
# Extract shortest path
paths_sf <- net %>%
  slice(paths$node_paths[[1]])

# Visualize
leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  addPolylines(data = net %>% st_as_sf("edges"), color = 'grey', weight = 2, opacity = 0.7) %>% 
  addPolylines(data = paths_sf %>% st_as_sf("edges"), color = "red", weight = 4, opacity = 0.8) %>% 
  addCircles(data = paths_sf %>% st_as_sf("nodes"), stroke = F, fillColor = "red", fillOpacity = 0.8, radius = 50)
## Assignment: sample one point from each tract, calculate average travel time from one census tract to all others. Repeat for all tracts.
## See if there is any patterns to the accessibility.

Extract intersections

end_points <- smoothed_net %>% 
  st_as_sf('nodes') %>% 
  st_join(smoothed_net %>% activate("edges") %>% st_as_sf())

intersections <- end_points %>% 
  group_by(.tidygraph_node_index) %>% 
  summarise(n = n()) %>% 
  filter(n > 1) 

leaflet() %>% 
  addProviderTiles(providers$CartoDB.DarkMatter) %>% 
  setView( -84.3854, 33.7768, zoom = 14) %>% # zooming in to show more details
  addPolylines(data = smoothed_net %>% st_as_sf('edges'), col = "grey", weight = 3, opacity = 0.9) %>% 
    addCircles(data = intersections %>% st_as_sf('nodes'), fillColor = 'red', stroke = F, radius = 20, fillOpacity = 0.7) 

Merge it with Census data

Let’s recycle the code for downloading Census data through API from last week.

acs2020c <- acs2020 %>% 
  select(GEOID,
         hhinc = hhincE,
         r_tot = r_totE,
         r_wh = r_whE,
         r_bl = r_blE,
         tot_hh = tot_hhE,
         own_novhc = own_novhcE,
         rent_novhc = rent_novhcE) %>% 
  mutate(pct_wh = r_wh / r_tot,
         pct_bl = r_bl / r_tot,
         pct_novhc = (own_novhc + rent_novhc)/tot_hh) %>% 
  mutate(area1 = unclass(st_area(.))) %>% 
  st_transform(26967) %>% 
  mutate(area2 = unclass(st_area(.))) %>% 
  st_transform(crs = 4326) %>% 
  mutate(ln_pop_den = log((r_tot / (area1/1000^2)) + 1)) %>% 
  filter(!is.na(hhinc), !is.na(r_tot), !is.na(own_novhc))
census_centrality <- acs2020c[bb_sf, ,op = st_within] %>%
  st_join(network_char %>% st_as_sf("nodes")) %>%
  group_by(GEOID) %>%
  summarise(n = n(),
            hhinc = mean(hhinc, na.rm = T),
            pct_wh = mean(pct_wh, na.rm = T),
            pct_bl = mean(pct_bl, na.rm = T),
            pct_novhc = mean(pct_novhc, na.rm = T),
            node_bc = mean(node_bc, na.rm = T))
tm_shape(acs2020c) + tm_polygons(col = "grey", alpha = 0.5) +
  tm_shape(census_centrality) + tm_polygons(col = "node_bc", style = "quantile") +
  tm_shape(bb_sf) + tm_borders()
census_centrality_plot <- census_centrality %>%
  mutate(hhinc = log(hhinc),
         pct_novhc = log(pct_novhc + 0.02)) %>%
  pivot_longer(cols = c('hhinc', 'pct_wh', 'pct_bl', 'pct_novhc'), names_to = "variable", values_to = "value") %>% 
  mutate(variable = factor(variable, labels = c("Household Income", "% Black", "% No HH with no cars", "% White")))

centrality_plot <- census_centrality_plot %>%
  ggplot() +
  geom_point(aes(x = node_bc, y = value), alpha = 0.2) +
  facet_wrap(~variable, scales = "free_y") +
  labs(x = "Centrality", title = "Centrality VS. Socio-demographics") +
  theme_bw()

centrality_plot + ggpubr::stat_cor(aes(x = node_bc, y = value))
## Warning: Removed 636 rows containing non-finite values (stat_cor).
## Warning: Removed 636 rows containing missing values (geom_point).

ggplotly(centrality_plot)